Calculate Well Trajectory from Different Data Sources

This notebook shows examples of how to calculate the well trajectory from different data sources. Currently accepting data from CSV, dataframe, dictionary, and json.

The data from csv and dataframe must be in a certain format commonly seen in directional surveys. The dictionary and json must be in a format specific to the library.

[13]:
from welltrajconvert.wellbore_trajectory import *
from welltrajconvert.data_source import *

Get Wellbore Trajectory object

[14]:
path = './'

get_files is a great function for accessing your data in a folder. You can access all data in a specific folder or data with a specific extension. It returns said data in a list of items.

[3]:
# use get_files to get all the files in the data directory
file_paths = get_files(path, folders='data')
#gives a path lib path to all your files in the folder of choice
file_paths.items
[3]:
[WindowsPath('data/wellbore_survey.csv'),
 WindowsPath('data/wellbore_survey.json'),
 WindowsPath('data/wellbore_survey_many.csv'),
 WindowsPath('data/well_export.json')]

From Dict

[18]:
json_path = get_files(path, folders='data', extensions='.json')
json_path.items[0]

with open(json_path.items[0]) as json_file:
    json_data = json.load(json_file)
json_file.close()
#show the dict keys present in the dict.
json_data.keys()
[18]:
dict_keys(['wellId', 'md', 'inc', 'azim', 'surface_latitude', 'surface_longitude'])
[4]:
# call DataSource.from_dictionary and input the dict data
my_data = DataSource.from_dictionary(json_data)

#view the dict data dataclass object
my_data.data
#create a wellboreTrajectory object
dev_obj = WellboreTrajectory(my_data.data)
#view the object
dev_obj.deviation_survey_obj
#calculate the survey points along the wellbore for the object
dev_obj.calculate_survey_points()
#serialize the data
json_ds = dev_obj.serialize()
# view the json in a df
json_ds_obj = json.loads(json_ds)
df_min_curve = pd.DataFrame(json_ds_obj)
df_min_curve.head()
[4]:
wellId md inc azim tvd e_w_deviation n_s_deviation dls surface_latitude surface_longitude longitude_points latitude_points zone_number zone_letter x_points y_points surface_x surface_y isHorizontal
0 well_C 0.00 0.00 227.11 0.000000 0.000000e+00 0.000000e+00 0.000000 29.908294 47.688521 47.688521 29.908294 38 R 759587.934440 3.311662e+06 759587.93444 3.311662e+06 Vertical
1 well_C 35.00 0.00 227.11 35.000000 -1.484001e-17 -1.378531e-17 0.000000 29.908294 47.688521 47.688521 29.908294 38 R 759587.934440 3.311662e+06 759587.93444 3.311662e+06 Vertical
2 well_C 774.81 0.46 227.11 774.802052 -2.175842e+00 -2.021203e+00 0.062178 29.908294 47.688521 47.688514 29.908289 38 R 759587.271243 3.311661e+06 759587.93444 3.311662e+06 Vertical
3 well_C 800.00 0.13 163.86 799.991684 -2.241984e+00 -2.117474e+00 1.312323 29.908294 47.688521 47.688514 29.908289 38 R 759587.251083 3.311661e+06 759587.93444 3.311662e+06 Vertical
4 well_C 900.00 0.57 230.43 899.989571 -2.593878e+00 -2.543311e+00 0.439221 29.908294 47.688521 47.688512 29.908288 38 R 759587.143826 3.311661e+06 759587.93444 3.311662e+06 Vertical

From DF

If you have a df and want to calculate its metadata use the following workflow. Just enter in the column names and calculate its survey points.

[5]:
# FROM DF
csv_path = get_files(path, folders='data', extensions='.csv')
csv_path.items[0]

df = pd.read_csv(csv_path.items[0],sep=',')
df.head()
[5]:
wellId md inc azim surface_latitude surface_longitude
0 well_C 0.00 0.00 227.11 29.908294 47.688521
1 well_C 35.00 0.00 227.11 29.908294 47.688521
2 well_C 774.81 0.46 227.11 29.908294 47.688521
3 well_C 800.00 0.13 163.86 29.908294 47.688521
4 well_C 900.00 0.57 230.43 29.908294 47.688521
[6]:
# call the from_df method, fill in the parameters with the column names
my_data = DataSource.from_df(df, wellId_name='wellId',md_name='md',inc_name='inc',azim_name='azim',
         surface_latitude_name='surface_latitude',surface_longitude_name='surface_longitude')

#view the dict data dataclass object
my_data.data
#create a wellboreTrajectory object
dev_obj = WellboreTrajectory(my_data.data)
#view the object
dev_obj.deviation_survey_obj
#calculate the survey points along the wellbore for the object
dev_obj.calculate_survey_points()
#serialize the data
json_ds = dev_obj.serialize()

# view the json in a df
json_ds_obj = json.loads(json_ds)
df_min_curve = pd.DataFrame(json_ds_obj)
df_min_curve.head()
[6]:
wellId md inc azim tvd e_w_deviation n_s_deviation dls surface_latitude surface_longitude longitude_points latitude_points zone_number zone_letter x_points y_points surface_x surface_y isHorizontal
0 well_C 0.00 0.00 227.11 0.000000 0.000000e+00 0.000000e+00 0.000000 29.908294 47.688521 47.688521 29.908294 38 R 759587.934440 3.311662e+06 759587.93444 3.311662e+06 Vertical
1 well_C 35.00 0.00 227.11 35.000000 -1.484001e-17 -1.378531e-17 0.000000 29.908294 47.688521 47.688521 29.908294 38 R 759587.934440 3.311662e+06 759587.93444 3.311662e+06 Vertical
2 well_C 774.81 0.46 227.11 774.802052 -2.175842e+00 -2.021203e+00 0.062178 29.908294 47.688521 47.688514 29.908289 38 R 759587.271243 3.311661e+06 759587.93444 3.311662e+06 Vertical
3 well_C 800.00 0.13 163.86 799.991684 -2.241984e+00 -2.117474e+00 1.312323 29.908294 47.688521 47.688514 29.908289 38 R 759587.251083 3.311661e+06 759587.93444 3.311662e+06 Vertical
4 well_C 900.00 0.57 230.43 899.989571 -2.593878e+00 -2.543311e+00 0.439221 29.908294 47.688521 47.688512 29.908288 38 R 759587.143826 3.311661e+06 759587.93444 3.311662e+06 Vertical

From CSV

If you have a csv and want to calculate its metadata use the following workflow. Just enter in the column names and calculate its survey points.

[7]:
csv_path = get_files(path, folders='data', extensions='.csv')

my_data = DataSource.from_csv(csv_path.items[0], wellId_name='wellId',md_name='md',inc_name='inc',azim_name='azim',
         surface_latitude_name='surface_latitude',surface_longitude_name='surface_longitude')

#view the dict data dataclass object
my_data.data
#create a wellboreTrajectory object
dev_obj = WellboreTrajectory(my_data.data)
#view the object
dev_obj.deviation_survey_obj
#calculate the survey points along the wellbore for the object
dev_obj.calculate_survey_points()
#serialize the data
json_ds = dev_obj.serialize()

# view the json in a df
json_ds_obj = json.loads(json_ds)
df_min_curve = pd.DataFrame(json_ds_obj)
df_min_curve.head()
[7]:
wellId md inc azim tvd e_w_deviation n_s_deviation dls surface_latitude surface_longitude longitude_points latitude_points zone_number zone_letter x_points y_points surface_x surface_y isHorizontal
0 well_C 0.00 0.00 227.11 0.000000 0.000000e+00 0.000000e+00 0.000000 29.908294 47.688521 47.688521 29.908294 38 R 759587.934440 3.311662e+06 759587.93444 3.311662e+06 Vertical
1 well_C 35.00 0.00 227.11 35.000000 -1.484001e-17 -1.378531e-17 0.000000 29.908294 47.688521 47.688521 29.908294 38 R 759587.934440 3.311662e+06 759587.93444 3.311662e+06 Vertical
2 well_C 774.81 0.46 227.11 774.802052 -2.175842e+00 -2.021203e+00 0.062178 29.908294 47.688521 47.688514 29.908289 38 R 759587.271243 3.311661e+06 759587.93444 3.311662e+06 Vertical
3 well_C 800.00 0.13 163.86 799.991684 -2.241984e+00 -2.117474e+00 1.312323 29.908294 47.688521 47.688514 29.908289 38 R 759587.251083 3.311661e+06 759587.93444 3.311662e+06 Vertical
4 well_C 900.00 0.57 230.43 899.989571 -2.593878e+00 -2.543311e+00 0.439221 29.908294 47.688521 47.688512 29.908288 38 R 759587.143826 3.311661e+06 759587.93444 3.311662e+06 Vertical

From CSV with multiple wells

If you have a file with multiple wells appened one after another you can use this simple function to break them up by wellid, run it through welltrajconvert and calculate each wells metadata. You can then convert the list of dicts into a dataframe.

[8]:
def from_multiple_wells_to_dict(df: DataFrame, wellId_name: Optional[str] = None, md_name: Optional[str] = None,
                                inc_name: Optional[str] = None, azim_name: Optional[str] = None,
                                surface_latitude_name: Optional[str] = None,
                                surface_longitude_name: Optional[str] = None,
                                surface_x_name: Optional[str] = None,
                                surface_y_name: Optional[str] = None):
    """
    takes a df of mulitple well deviation surveys in a typical columnar fashion and calculates their survey points.
    Then appends them to a list of dicts.

    :parameter: df

    :returns: list of dict
    """
    # group by wellId, ensures this will work with single well or mulitple.
    grouped = df.groupby(wellId_name)

    # initialize empty dict and list
    #appended_df = pd.DataFrame()
    dict_list = []
    # loop through groups converting them to the proper dict format
    for name, group in grouped:

        group.reset_index(inplace=True, drop=True)

        if surface_latitude_name is not None and surface_longitude_name is not None:
            well_obj = DataSource.from_df(group, wellId_name=wellId_name, md_name=md_name,
                                          inc_name=inc_name, azim_name=azim_name,
                                          surface_latitude_name=surface_latitude_name,
                                          surface_longitude_name=surface_longitude_name)

        if surface_x_name is not None and surface_y_name is not None:
            well_obj = DataSource.from_df(group, wellId_name=wellId_name, md_name=md_name,
                                          inc_name=inc_name, azim_name=azim_name,
                                          surface_x_name=surface_x_name,
                                          surface_y_name=surface_y_name)

        well_obj = WellboreTrajectory(well_obj.data)
        well_obj.calculate_survey_points()
        json_ds = well_obj.serialize()
        dict_list.append(json_ds)

    res = dict_list
    return res
[9]:
# FROM df with mulitple wells
csv_path = get_files(path, folders='data', extensions='.csv')
csv_path.items[1]

df = pd.read_csv(csv_path.items[1])
# well id unique
print(df['wellId'].unique())
df.head()
['well_C' 'well_A' 'well_B']
[9]:
wellId md inc azim surface_latitude surface_longitude
0 well_C 0.00 0.00 227.11 29.908294 47.688521
1 well_C 35.00 0.00 227.11 29.908294 47.688521
2 well_C 774.81 0.46 227.11 29.908294 47.688521
3 well_C 800.00 0.13 163.86 29.908294 47.688521
4 well_C 900.00 0.57 230.43 29.908294 47.688521
[10]:
# call function, fill in column name params
dicts = from_multiple_wells_to_dict(df,wellId_name='wellId',md_name='md',inc_name='inc',azim_name='azim',
                                     surface_latitude_name='surface_latitude',surface_longitude_name='surface_longitude')
#dicts[:]
[11]:
def list_of_dicts_to_df(dict_list):
    """takes a list of dicts and converts to a appended df"""
    appended_df = pd.DataFrame()
    for i in dict_list:
        json_ds_obj = json.loads(i)
        df_well_obj = pd.DataFrame(json_ds_obj)
        appended_df = appended_df.append(df_well_obj)
    return appended_df
[12]:
# convert a list of dicts into a df
df = list_of_dicts_to_df(dicts)
df.head()
[12]:
wellId md inc azim tvd e_w_deviation n_s_deviation dls surface_latitude surface_longitude longitude_points latitude_points zone_number zone_letter x_points y_points surface_x surface_y isHorizontal
0 well_A 0.0000 0.00 227.11 0.000000 0.000000e+00 0.000000e+00 0.000000 29.908294 47.688521 47.688521 29.908294 38 R 759587.932586 3.311662e+06 759587.932586 3.311662e+06 Vertical
1 well_A 36.7500 0.00 227.11 36.750000 -1.558201e-17 -1.447458e-17 0.000000 29.908294 47.688521 47.688521 29.908294 38 R 759587.932586 3.311662e+06 759587.932586 3.311662e+06 Vertical
2 well_A 813.5505 0.46 227.11 813.542155 -2.284634e+00 -2.122263e+00 0.059217 29.908294 47.688521 47.688513 29.908289 38 R 759587.236230 3.311661e+06 759587.932586 3.311662e+06 Vertical
3 well_A 840.0000 0.13 163.86 839.991268 -2.354083e+00 -2.223348e+00 1.249832 29.908294 47.688521 47.688513 29.908289 38 R 759587.215062 3.311661e+06 759587.932586 3.311662e+06 Vertical
4 well_A 945.0000 0.57 230.43 944.989049 -2.723571e+00 -2.670477e+00 0.418306 29.908294 47.688521 47.688512 29.908287 38 R 759587.102442 3.311661e+06 759587.932586 3.311662e+06 Vertical
[ ]: